
clear
capture log close
set more off

******* cpi 2
. clear

. use "bls_prices_2.dta"

* normalize cpi to equal one in base years

foreach num in 1993  {
	gen norm`num' = cpi if year==`num'
	sort year
	replace norm`num' = norm`num'[_n-1] if norm`num'==.
	gsort -year
	replace norm`num' = norm`num'[_n-1] if norm`num'==.
	gen cpi_`num' = cpi/norm`num'
	
}
* save
keep year cpi_*
sort year
save cpi_1993, replace 


clear
use cps_00003
sort year serial pernum
merge m:1 year using cpi_1993
*keep if _merge==3
drop _merge

* calculate real income

gen hhincome_1993 = hhincome/cpi_1993

** keep household head only
keep if relate==101
	
* Census division
gen division = 1 if region==11
replace division = 2 if region==12
replace division = 3 if region==21
replace division = 4 if region==22
replace division = 5 if region==31
replace division = 6 if region==32
replace division = 7 if region==33
replace division = 8 if region==41
replace division = 9 if region==42

save income_b1993.dta,replace

*1120
clear
use income_b1993.dta

forvalues i=1993/2020 {
use income_b1993.dta

* create real income groups

	gen hhfaminc_1993 = 1 if hhincome_1993<3000
	replace hhfaminc_1993 = 2 if hhincome_1993>=3000 & hhincome_1993<4000
	replace hhfaminc_1993 = 3 if hhincome_1993>=4000 & hhincome_1993<5000
	replace hhfaminc_1993 = 4 if hhincome_1993>=5000 & hhincome_1993<6000
	replace hhfaminc_1993 = 5 if hhincome_1993>=6000 & hhincome_1993<7500
	replace hhfaminc_1993 = 6 if hhincome_1993>=7500 & hhincome_1993<9000
	replace hhfaminc_1993 = 7 if hhincome_1993>=9000 & hhincome_1993<10000
	replace hhfaminc_1993 = 8 if hhincome_1993>=10000 & hhincome_1993<11000
	replace hhfaminc_1993 = 9 if hhincome_1993>=11000 & hhincome_1993<12500
	replace hhfaminc_1993 = 10 if hhincome_1993>=12500 & hhincome_1993<14000
	replace hhfaminc_1993 = 11 if hhincome_1993>=14000 & hhincome_1993<15000
	replace hhfaminc_1993 = 12 if hhincome_1993>=15000 & hhincome_1993<17500
	replace hhfaminc_1993 = 13 if hhincome_1993>=17500 & hhincome_1993<20000
	replace hhfaminc_1993 = 14 if hhincome_1993>=20000 & hhincome_1993<22500
	replace hhfaminc_1993 = 15 if hhincome_1993>=22500 & hhincome_1993<25000
	replace hhfaminc_1993 = 16 if hhincome_1993>=25000 & hhincome_1993<27500
	replace hhfaminc_1993 = 17 if hhincome_1993>=27500 & hhincome_1993<30000
	replace hhfaminc_1993 = 18 if hhincome_1993>=30000 & hhincome_1993<32500
	replace hhfaminc_1993 = 19  if hhincome_1993>=32500 & hhincome_1993<35000
	replace hhfaminc_1993 = 20 if hhincome_1993>=35000 & hhincome_1993<40000
	replace hhfaminc_1993 = 21 if hhincome_1993>=40000 & hhincome_1993<45000
	replace hhfaminc_1993 = 22 if hhincome_1993>=45000 & hhincome_1993<50000
	replace hhfaminc_1993 = 23 if hhincome_1993>=50000 & hhincome_1993<75000
	replace hhfaminc_1993 = 24 if hhincome_1993>=75000 & hhincome_1993<100000
	replace hhfaminc_1993 = 25 if hhincome_1993>=100000 & hhincome_1993!=.

keep if year==`i'

collapse (sum) asecfwt, by(hhfaminc_1993)
rename asecfwt hh_count

egen sumhh=total(hh_count)
gen weight=hh_count/sumhh
save `i'_income_93,replace
}



* regroup into 4
clear
forval i=1993/2020 {
use `i'_income_93
egen sumweight=sum(weight)
recode hhfaminc_1993 (1/16=1) (17/20=2) (21/24=3) (25=4), generate(inc_plot)
collapse (sum) weight,by (inc_plot)
rename weight weight_`i' 
save inc1993_`i'.dta,replace
}

** append

clear
use inc1993_1993
forval i=1993/2020 {
capture drop _merge
merge 1:1 inc_plot using inc1993_`i'
}



****=== division change over years ***======


clear
use income_b1993.dta

forvalues i=1993/2020 {
use income_b1993.dta

keep if year==`i'

collapse (sum) asecfwt, by(division)
rename asecfwt hh_count

egen sumhh=total(hh_count)
gen weight=hh_count/sumhh
save `i'_93division,replace
}

clear
use 1993_93division
rename weight weight_1993

forval i=1994/2020 {
capture drop _merge

merge 1:1 division using `i'_93division
rename weight weight_`i'
}

drop hh_count sumhh _merge
save divsion_93years.dta,replace

** regroup into 4 regions
clear
forval i=1993/2020 {
use `i'_93division
egen sumweight=sum(weight)
recode division (1/2=1) (3/4=2) (5/7=3) (8/9=4), generate(region)
collapse (sum) weight,by (region)
rename weight weight_`i' 
save region93_`i'.dta,replace
}

** append

clear
use region93_1993
forval i=1993/2020 {
capture drop _merge
merge 1:1 region using region93_`i'
}
